#Compiled code, Gov 2001, Mass Conspiracy Theories 
library(Amelia)
library(MatchIt)
library(glmnet)
library(mice)
library(cape)
library(ade4)

setwd("Spring 2015/Gov 2001/Replication Paper/Replication")

###Cleaning the mass conspiracy data from the 2011 ces chicago module 
#full_data_original <- as.data.frame(read.spss("CCES11_CHI_OUTPUT.sav", reencode='utf-8'))
full_data <- read.csv("15Apr_Data.csv", h=T, stringsAsFactors=F)
verbatims <- grep("_t", colnames(full_data), value=TRUE)
verbatims <- c(verbatims, "CC301b", "CC301c", "CC301d", 
               "V245", "V529", "V501","V513","V521", "V533", 
               "V536", "V579", "V276", "V279", "V280", "V210")
full_data <- full_data[,!(colnames(full_data)%in%verbatims)]

for(i in c("randrev_CC343"))
{
  index <- which(colnames(full_data)==i)
  full_data[which(full_data[,index]=="Opponent / Supporter"),index] <- 0
  full_data[which(full_data[,index]=="Supporter / Opponent"),index] <- 1
}

for(i in c("CHI442"))
{ 
  index <- which(colnames(full_data)==i)
  full_data[which(full_data[,index]=="No "),index] <- 0
  full_data[which(full_data[,index]=="Yes "),index] <- 1
}

for(i in c("CC337"))
{
  index <- which(colnames(full_data)==i)
  full_data[which(full_data[,index]=="Less than $2,500"),index] <- 1
  full_data[which(full_data[,index]=="$2,500 to $10,000"),index] <- 2
  full_data[which(full_data[,index]=="$10,000 to $20,000"),index] <- 3
  full_data[which(full_data[,index]=="$20,000 to $50,000"),index] <- 4
  full_data[which(full_data[,index]=="More than $50,000"),index] <- 5
}

for(i in c("V246", "V247"))
{
  index <- which(colnames(full_data)==i)
  full_data[which(full_data[,index]=="Less than $10,000"),index] <- 1
  full_data[which(full_data[,index]=="$10,000 - $19,999"),index] <- 2
  full_data[which(full_data[,index]=="$20,000 - $29,999"),index] <- 3
  full_data[which(full_data[,index]=="$30,000 - $39,999"),index] <- 4
  full_data[which(full_data[,index]=="$40,000 - $49,999"),index] <- 5
  full_data[which(full_data[,index]=="$50,000 - $59,999"),index] <- 6
  full_data[which(full_data[,index]=="$60,000 - $69,999"),index] <- 7
  full_data[which(full_data[,index]=="$70,000 - $79,999"),index] <- 8
  full_data[which(full_data[,index]=="$80,000 - $99,999"),index] <- 9
  full_data[which(full_data[,index]=="$100,000 - $119,999"),index] <- 10
  full_data[which(full_data[,index]=="$120,000 - $149,999"),index] <- 11
  full_data[which(full_data[,index]=="$150,000 - $199,999"),index] <- 12
  full_data[which(full_data[,index]=="$200,000 - $249,999"),index] <- 13
  full_data[which(full_data[,index]=="$250,000 - $349,999"),index] <- 14
  full_data[which(full_data[,index]=="$350,000 - $499,999"),index] <- 15
  full_data[which(full_data[,index]=="$500,000 or more"),index] <- 16
}

for(i in c("V244"))
{
  index <- which(colnames(full_data)==i)
  full_data[which(full_data[,index]=="Most of the time"),index] <- 1
  full_data[which(full_data[,index]=="Some of the time"),index] <- 2
  full_data[which(full_data[,index]=="Only now and then"),index] <- 3
  full_data[which(full_data[,index]=="Hardly at all"),index] <- 4
}

for(i in c("V216"))
{
  index <- which(colnames(full_data)==i)
  full_data[which(full_data[,index]=="Very important"),index] <- 1
  full_data[which(full_data[,index]=="Somewhat important"),index] <- 2
  full_data[which(full_data[,index]=="Not too important"),index] <- 3
  full_data[which(full_data[,index]=="Not at all important"),index] <- 4
}

for(i in c("V217"))
{
  index <- which(colnames(full_data)==i)
  full_data[which(full_data[,index]=="More than once a week"),index] <- 1
  full_data[which(full_data[,index]=="Once a week"),index] <- 2
  full_data[which(full_data[,index]=="Once or twice a month"),index] <- 3
  full_data[which(full_data[,index]=="A few times a year"),index] <- 4
  full_data[which(full_data[,index]=="Seldom"),index] <- 5
  full_data[which(full_data[,index]=="Never"),index] <- 6
}

for(i in c("V218"))
{
  index <- which(colnames(full_data)==i)
  full_data[which(full_data[,index]=="Several times a day"),index] <- 1
  full_data[which(full_data[,index]=="Once a day"),index] <- 2
  full_data[which(full_data[,index]=="A few times a week"),index] <- 3
  full_data[which(full_data[,index]=="Once a week"),index] <- 3
  full_data[which(full_data[,index]=="A few times a month"),index] <- 4
  full_data[which(full_data[,index]=="Seldom"),index] <- 5
  full_data[which(full_data[,index]=="Never"),index] <- 6
}

for(i in c("V243"))
{
  index <- which(colnames(full_data)==i)
  full_data[which(full_data[,index]=="Very liberal"),index] <- 1
  full_data[which(full_data[,index]=="Moderate"),index] <- 3
}

for(i in c("V212d"))
{
  index <- which(colnames(full_data)==i)
  full_data[which(full_data[,index]=="Strong Democrat"),index] <- 1
  full_data[which(full_data[,index]=="Not very strong Democrat"),index] <- 2
  full_data[which(full_data[,index]=="Lean Democrat"),index] <- 3
  full_data[which(full_data[,index]=="Independent"),index] <- 4
  full_data[which(full_data[,index]=="Lean Republican"),index] <- 5
  full_data[which(full_data[,index]=="Not very strong Democrat"),index] <- 6
  full_data[which(full_data[,index]=="Strong Republican"),index] <- 7
}

for(i in c("CC323a", "CC323b", "CC323c"))
{
  index <- which(colnames(full_data)==i)
  full_data[which(full_data[,index]=="Strongly Approve"),index] <- 1
  full_data[which(full_data[,index]=="Approve"),index] <- 2
  full_data[which(full_data[,index]=="Disapprove"),index] <- 3
  full_data[which(full_data[,index]=="Strongly Disapprove"),index] <- 4
}


for(i in c("CC350"))
{
  index <- which(colnames(full_data)==i)
  full_data[which(full_data[,index]=="Global climate change has been established as a serious problem, and immediate action is necessary."),index] <- 1
  full_data[which(full_data[,index]=="There is enough evidence that climate change is taking place and some action should be taken."),index] <- 2
  full_data[which(full_data[,index]=="We don't know enough about global climate change, and more research is necessary before we take any actions."),index] <- 3
  full_data[which(full_data[,index]=="Concern about global climate change is exaggerated. No action is necessary."),index] <- 4
  full_data[which(full_data[,index]=="Global climate change is not occurring; this is not a real issue."),index] <- 5
}

for(i in c("CC352"))
{
  index <- which(colnames(full_data)==i)
  full_data[which(full_data[,index]=="By law, abortion should never be permitted"),index] <- 1
  full_data[which(full_data[,index]=="The law should permit abortion only in case of rape, incest or when the woman's life is in danger"),index] <- 2
  full_data[which(full_data[,index]=="We don't know enough about global climate change, and more research is necessary before we take any actions"),index] <- 3
  full_data[which(full_data[,index]=="The law should permit abortion for reasons other than rape, incest, or danger to the woman's life, but only after the ne"),index] <- 4
  full_data[which(full_data[,index]=="By law, a woman should always be able to obtain an abortion as a matter of personal choice"),index] <- 5
}

for(i in c("CC354"))
{
  index <- which(colnames(full_data)==i)
  full_data[which(full_data[,index]=="Strongly support"),index] <- 1
  full_data[which(full_data[,index]=="Somewhat support"),index] <- 2
  full_data[which(full_data[,index]=="Somewhat oppose"),index] <- 3
  full_data[which(full_data[,index]=="Strongly oppose"),index] <- 4
}

for(i in c("CC359"))
{
  index <- which(colnames(full_data)==i)
  full_data[which(full_data[,index]=="Somewhat agree"),index] <- 2
  full_data[which(full_data[,index]=="Somewhat disagree"),index] <- 4
}


for(i in c("CC330"))
{
  index <- which(colnames(full_data)==i)
  full_data[which(full_data[,index]=="No"),index] <- 0
  full_data[which(full_data[,index]=="I usually vote, but did not in 2008"),index] <- 0
  full_data[which(full_data[,index]=="Yes. I definitely voted."),index] <- 1
}

for(i in c("CC332"))
{
  index <- which(colnames(full_data)==i)
  full_data[which(full_data[,index]=="No"),index] <- 0
  full_data[which(full_data[,index]=="I usually vote, but did not in 2010"),index] <- 0
  full_data[which(full_data[,index]=="Yes. I definitely voted."),index] <- 1
}

for(i in c("CC361"))
{
  index <- which(colnames(full_data)==i)
  full_data[which(full_data[,index]=="Fairly"),index] <- 0
  full_data[which(full_data[,index]=="Unfairly"),index] <- 1
}

for(i in c("CC363"))
{
  index <- which(colnames(full_data)==i)
  full_data[which(full_data[,index]=="The courts should uphold the Voting Rights Act."),index] <- 0
  full_data[which(full_data[,index]=="The courts should NOT uphold the Act."),index] <- 1
}

for(i in 1:nrow(full_data))
  #for(i in 1:20)
{
  if(i%%100==0){print(i)}
  full_data[i,which(full_data[i,]=="Strongly agree")] <- 1
  full_data[i,which(full_data[i,]=="Agree")] <- 2
  full_data[i,which(full_data[i,]=="Neither agree nor disagree")] <- 3
  full_data[i,which(full_data[i,]=="Disagree")] <- 4
  full_data[i,which(full_data[i,]=="Strongly disagree")] <- 5
  full_data[i,which(full_data[i,]=="Skipped")] <- 8
  full_data[i,which(full_data[i,]=="Not Asked")] <- 9
  
  full_data[i,which(full_data[i,]=="No")] <- 0
  full_data[i,which(full_data[i,]=="Yes")] <- 1
  
  full_data[i,which(full_data[i,]=="Believe in")] <- 3
  full_data[i,which(full_data[i,]=="Not sure about")] <- 2
  full_data[i,which(full_data[i,]=="Don't believe in")] <- 1
  
  full_data[i,which(full_data[i,]=="Not at all")] <- 1
  full_data[i,which(full_data[i,]=="Moderately")] <- 2
  full_data[i,which(full_data[i,]=="Quite a bit")] <- 3
  full_data[i,which(full_data[i,]=="Extremely")] <- 4
  
  full_data[i,which(full_data[i,]=="Not at all relevant")] <- 1
  full_data[i,which(full_data[i,]=="Not very relevant")] <- 2
  full_data[i,which(full_data[i,]=="Slightly relevant")] <- 3
  full_data[i,which(full_data[i,]=="Somewhat relevant")] <- 4
  full_data[i,which(full_data[i,]=="Very relevant")] <- 5
  full_data[i,which(full_data[i,]=="Extremely Relevant")] <- 6
  
  full_data[i,which(full_data[i,]=="Not confident at all")] <- 1
  full_data[i,which(full_data[i,]=="A little confident")] <- 2
  full_data[i,which(full_data[i,]=="Somewhat confident")] <- 3
  full_data[i,which(full_data[i,]=="Fairly confident")] <- 4
  full_data[i,which(full_data[i,]=="Very confident")] <- 5
  full_data[i,which(full_data[i,]=="Certain")] <- 6
  
  full_data[i,which(full_data[i,]=="Strongly Approve")] <- 1
  full_data[i,which(full_data[i,]=="Somewhat Approve")] <- 2
  full_data[i,which(full_data[i,]=="Somewhat Disapprove")] <- 3
  full_data[i,which(full_data[i,]=="Strongly Disapprove")] <- 4
  
  full_data[i,which(full_data[i,]=="Support")] <- 1
  full_data[i,which(full_data[i,]=="Oppose")] <- 0
  
  full_data[i,which(full_data[i,]=="Very Liberal")] <- 1  
  full_data[i,which(full_data[i,]=="Liberal")] <- 2
  full_data[i,which(full_data[i,]=="Somewhat Liberal")] <- 3
  full_data[i,which(full_data[i,]=="Middle of the Road")] <- 4
  full_data[i,which(full_data[i,]=="Somewhat Conservative")] <- 5
  full_data[i,which(full_data[i,]=="Conservative")] <- 6
  full_data[i,which(full_data[i,]=="Very Conservative")] <- 7
  
  full_data[i,which(full_data[i,]=="Has a very conservative bias")] <- 1
  full_data[i,which(full_data[i,]=="Has a conservative bias")] <- 2
  full_data[i,which(full_data[i,]=="Has a somewhat conservative bias")] <- 3
  full_data[i,which(full_data[i,]=="Is unbiased")] <- 4
  full_data[i,which(full_data[i,]=="Has a somewhat liberal bias")] <- 5
  full_data[i,which(full_data[i,]=="Has a liberal bias")] <- 6
  full_data[i,which(full_data[i,]=="Has a very liberal bias")] <- 7
  
  full_data[i,which(full_data[i,]=="Probably Against")] <- 0
  full_data[i,which(full_data[i,]=="Probably For")] <- 1
  
  full_data[i,which(full_data[i,]=="None of the time")] <- 1
  full_data[i,which(full_data[i,]=="Some of the time")] <- 2
  full_data[i,which(full_data[i,]=="Most of the time")] <- 3
  full_data[i,which(full_data[i,]=="Just about always")] <- 4
  
  full_data[i,which(full_data[i,]=="Independence")] <- 0
  full_data[i,which(full_data[i,]=="Respect For Elders")] <- 1
  
  full_data[i,which(full_data[i,]=="Obedience")] <- 0
  full_data[i,which(full_data[i,]=="Self-Reliance")] <- 1
  
  full_data[i,which(full_data[i,]=="Curiosity")] <- 0
  full_data[i,which(full_data[i,]=="Good Manners")] <- 1
  
  full_data[i,which(full_data[i,]=="Male")] <- 0
  full_data[i,which(full_data[i,]=="Female")] <- 1
  
  full_data[i,which(full_data[i,]=="Own")] <- 0
  full_data[i,which(full_data[i,]=="Rent")] <- 1
  
  full_data[i,which(full_data[i,]=="I know what policies should be chosen")] <- 1
  full_data[i,which(full_data[i,]=="It depends")] <- 2
  full_data[i,which(full_data[i,]=="I'm happier to let someone similar to me choose")] <- 3
  
  full_data[i,which(full_data[i,]=="Less than 1 month")] <- 1
  full_data[i,which(full_data[i,]=="1-6 months")] <- 2
  full_data[i,which(full_data[i,]=="7-11 months")] <- 3
  full_data[i,which(full_data[i,]=="1-2 years")] <- 4
  full_data[i,which(full_data[i,]=="3-4 years")] <- 5
  full_data[i,which(full_data[i,]=="5-10 years")] <- 6
  full_data[i,which(full_data[i,]=="More than 10 years")] <- 7
  
  full_data[i,which(full_data[i,]=="Gotten much better")] <- 1
  full_data[i,which(full_data[i,]=="Gotten better")] <- 2
  full_data[i,which(full_data[i,]=="Stayed about the same")] <- 3
  full_data[i,which(full_data[i,]=="Gotten worse")] <- 4
  full_data[i,which(full_data[i,]=="Gotten much worse")] <- 5
}
full_data_temp.storage <- full_data
#full_data <- full_data_temp.storage
#clarify codings
colnames(full_data)[which(colnames(full_data)=="CHI456")] <- "respect_for_elders"
colnames(full_data)[which(colnames(full_data)=="CHI457")] <- "self_reliance"
colnames(full_data)[which(colnames(full_data)=="CHI458")] <- "good_manners"
colnames(full_data)[which(colnames(full_data)=="V208")] <- "female"
colnames(full_data)[which(colnames(full_data)=="CC306")] <- "rent"
colnames(full_data)[which(colnames(full_data)=="CC361")] <- "redistrict_unfairly"
colnames(full_data)[which(colnames(full_data)=="CC363")] <- "reject_VRA"
colnames(full_data)[which(colnames(full_data)=="randrev_CC343")] <- "TeaParty_supporter_opponent"
colnames(full_data)[which(colnames(full_data)=="CC322a")] <- "heard_of_governor"
colnames(full_data)[which(colnames(full_data)=="CC322b")] <- "heard_of_senator1"
colnames(full_data)[which(colnames(full_data)=="CC322c")] <- "heard_of_senator2"
colnames(full_data)[which(colnames(full_data)=="CC322d")] <- "heard_of_houserep"
colnames(full_data)[which(colnames(full_data)=="CC321a")] <- "party_know_hreps"
colnames(full_data)[which(colnames(full_data)=="CC321b")] <- "party_know_senate"
colnames(full_data)[which(colnames(full_data)=="CC321c")] <- "party_know_st.senate"
colnames(full_data)[which(colnames(full_data)=="CC321d")] <- "party_know_lower.chamber"

#final cleanings
to_dummy_index <- c("V209", "CC301a", "education", "V211", 
                    "V206", "V282", "V209", "CC301a", "CC302",
                    "CC308", "CC309b", "CC309c", "CC331", "CC333", 
                    "CC333", "CC335b",
                    "CC340a", "CC340b", "CC340c", "CC340d", "CC343", "CC355a",
                    "CC355b", "V212c", "V212a", "V263", "V264", "V265",
                    "V219", "V220", "V222", "V223", "V224", "V225", 
                    "V226", "V227", "V228", "V229", "V230", "V231",
                    "V232", "V233", "V234", "V235", "V236", "V237", 
                    "V238", "V239", "V240", "V214", "V273", "V274",
                    "V275", "CC334", "heard_of_governor", "heard_of_senator1", 
                    "heard_of_senator2", "heard_of_houserep", 
                    "party_know_hreps", "party_know_senate", "party_know_st.senate", 
                    "party_know_lower.chamber")

factor_index <- which(colnames(full_data)%in%to_dummy_index)
df_numeric1 <- full_data[,-factor_index]
df_numeric <- apply(df_numeric1, 2, as.numeric)
df_factor <- full_data[,factor_index]
df_factor <- acm.disjonctif(df_factor)

full_data <- cbind(df_numeric, df_factor)

colnames(full_data)[which(colnames(full_data)=="V100")] <- "panelist_id"
colnames(full_data)[which(colnames(full_data)=="V101")] <- "team_weights"

colnames(full_data)[which(colnames(full_data)=="CHI401_413_1")] <- "local_tv"
colnames(full_data)[which(colnames(full_data)=="CHI401_413_2")] <- "national_tv"
colnames(full_data)[which(colnames(full_data)=="CHI401_413_3")] <- "cnn"
colnames(full_data)[which(colnames(full_data)=="CHI401_413_4")] <- "msnbc"
colnames(full_data)[which(colnames(full_data)=="CHI401_413_5")] <- "fox"
colnames(full_data)[which(colnames(full_data)=="CHI401_413_6")] <- "pbs"
colnames(full_data)[which(colnames(full_data)=="CHI401_413_7")] <- "national_paper"
colnames(full_data)[which(colnames(full_data)=="CHI401_413_8")] <- "local_paper"
colnames(full_data)[which(colnames(full_data)=="CHI401_413_9")] <- "npr"
colnames(full_data)[which(colnames(full_data)=="CHI401_413_10")] <- "talk_radio"
colnames(full_data)[which(colnames(full_data)=="CHI401_413_11")] <- "political_websites"
colnames(full_data)[which(colnames(full_data)=="CHI401_413_12")] <- "news_websites"
colnames(full_data)[which(colnames(full_data)=="CHI401_413_13")] <- "blog"

colnames(full_data)[which(colnames(full_data)=="CHI416")] <- "consp1_cabal"
colnames(full_data)[which(colnames(full_data)=="CHI417")] <- "consp1_more_informed"
colnames(full_data)[which(colnames(full_data)=="CHI418")] <- "consp1_don_t_have_much_to_say"
colnames(full_data)[which(colnames(full_data)=="CHI419")] <- "consp1_don_t_care_what_think"
colnames(full_data)[which(colnames(full_data)=="CHI420")] <- "consp1_well_qualified"
colnames(full_data)[which(colnames(full_data)=="CHI421")] <- "consp1_govt_too_complicated"

colnames(full_data)[which(colnames(full_data)=="CHI430")] <- "belief.grid_ghosts"
colnames(full_data)[which(colnames(full_data)=="CHI431")] <- "belief.grid_god"
colnames(full_data)[which(colnames(full_data)=="CHI432")] <- "belief.grid_angels"
colnames(full_data)[which(colnames(full_data)=="CHI433")] <- "belief.grid_devil"
colnames(full_data)[which(colnames(full_data)=="CHI434")] <- "belief.grid_evolution"

colnames(full_data)[which(colnames(full_data)=="CHI435")] <- "attitude2_good_v_evil"
colnames(full_data)[which(colnames(full_data)=="CHI436")] <- "attitude2_compassion"
colnames(full_data)[which(colnames(full_data)=="CHI437")] <- "attitude2_fairness"
colnames(full_data)[which(colnames(full_data)=="CHI438")] <- "attitude2_loyalty"
colnames(full_data)[which(colnames(full_data)=="CHI439")] <- "attitude2_gender_roles"
colnames(full_data)[which(colnames(full_data)=="CHI440")] <- "attitude2_disgusting"

colnames(full_data)[which(colnames(full_data)=="CHI442")] <- "endtimes"
colnames(full_data)[which(colnames(full_data)=="CHI442b")] <- "endtimes_followup"

colnames(full_data)[which(colnames(full_data)=="CHI450")] <- "trust_washington"
colnames(full_data)[which(colnames(full_data)=="CHI451")] <- "trust_general"

colnames(full_data)[which(colnames(full_data)=="CHI452")] <- "take_advantage"

colnames(full_data)[which(colnames(full_data)=="CHI453")] <- "attitude3_esp"
colnames(full_data)[which(colnames(full_data)=="CHI454")] <- "attitude3_horoscopes"
colnames(full_data)[which(colnames(full_data)=="CHI455")] <- "attitude3_intelligent_design"

colnames(full_data)[which(colnames(full_data)=="CAL301")] <- "year_invent_tele"
colnames(full_data)[which(colnames(full_data)=="CAL302")] <- "tele_invent_conf"
colnames(full_data)[which(colnames(full_data)=="CAL303")] <- "year_invent_perc_conf"
colnames(full_data)[which(colnames(full_data)=="CAL304")] <- "spain_pop"
colnames(full_data)[which(colnames(full_data)=="CAL305")] <- "spain_pop_conf"
colnames(full_data)[which(colnames(full_data)=="CAL306")] <- "spain_pop_perc_conf"
colnames(full_data)[which(colnames(full_data)=="CAL307")] <- "year_shakespeare_born"
colnames(full_data)[which(colnames(full_data)=="CAL308")] <- "year_shakespeare_born_conf"
colnames(full_data)[which(colnames(full_data)=="CAL309")] <- "year_shakespeare_born_perc_conf"
colnames(full_data)[which(colnames(full_data)=="CAL310")] <- "cali_pop"
colnames(full_data)[which(colnames(full_data)=="CAL311")] <- "cali_pop_conf"
colnames(full_data)[which(colnames(full_data)=="CAL312")] <- "cali_pop_perc_conf"
colnames(full_data)[which(colnames(full_data)=="CAL321")] <- "media_bias"
colnames(full_data)[which(colnames(full_data)=="CAL322")] <- "govt_policy"

colnames(full_data)[which(colnames(full_data)=="V207")] <- "birth_year"
colnames(full_data)[which(colnames(full_data)=="V290")] <- "latino_descent"
colnames(full_data)[which(colnames(full_data)=="V210")] <- "cces_testing_module"
colnames(full_data)[which(colnames(full_data)=="V201")] <- "zipcode"
colnames(full_data)[which(colnames(full_data)=="V203")] <- "voter_reg_status"
colnames(full_data)[which(colnames(full_data)=="V202")] <- "voter_reg_zip"
colnames(full_data)[which(colnames(full_data)=="V204")] <- "register_zip_confirm"
colnames(full_data)[which(colnames(full_data)=="V205")] <- "Voter_reg_zip_northdakota"
colnames(full_data)[which(colnames(full_data)=="V204_nd")] <- "voter_reg_status_northdakota"

colnames(full_data)[which(colnames(full_data)=="CC300")] <- "had_job"
colnames(full_data)[which(colnames(full_data)=="CC307")] <- "length_of_residence"
colnames(full_data)[which(colnames(full_data)=="CC309_1")] <- "media_use_blog"
colnames(full_data)[which(colnames(full_data)=="CC309_2")] <- "media_use_tv"
colnames(full_data)[which(colnames(full_data)=="CC309_3")] <- "media_use_newspaper"
colnames(full_data)[which(colnames(full_data)=="CC309_4")] <- "media_use_radio"
colnames(full_data)[which(colnames(full_data)=="CC309_5")] <- "media_use_none"

colnames(full_data)[which(colnames(full_data)=="CC310")] <- "national_economy"
colnames(full_data)[which(colnames(full_data)=="CC311")] <- "unemployment_guess"
colnames(full_data)[which(colnames(full_data)=="CC313")] <- "iraq_mistake"
colnames(full_data)[which(colnames(full_data)=="CC314")] <- "afghanistan_mistake"

colnames(full_data)[which(colnames(full_data)=="CC320a")] <- "job_approval_obama"
colnames(full_data)[which(colnames(full_data)=="CC320b")] <- "job_approval_house"
colnames(full_data)[which(colnames(full_data)=="CC320c")] <- "job_approval_senate"
colnames(full_data)[which(colnames(full_data)=="CC320f")] <- "job_approval_governor"
colnames(full_data)[which(colnames(full_data)=="CC320g")] <- "job_approval_state_leg"

colnames(full_data)[which(colnames(full_data)=="CC323a")] <- "approve_current_house_rep"
colnames(full_data)[which(colnames(full_data)=="CC323b")] <- "approve_current_senator1"
colnames(full_data)[which(colnames(full_data)=="CC323c")] <- "approve_current_senator2"

colnames(full_data)[which(colnames(full_data)=="CC330")] <- "voted_08"
colnames(full_data)[which(colnames(full_data)=="CC332")] <- "voted_10"
colnames(full_data)[which(colnames(full_data)=="CC334")] <- "representative_race"
colnames(full_data)[which(colnames(full_data)=="CC335")] <- "r_r_for_office"
colnames(full_data)[which(colnames(full_data)=="CC336")] <- "win_office"
colnames(full_data)[which(colnames(full_data)=="CC337")] <- "campaign_spending"

colnames(full_data)[which(colnames(full_data)=="CC340a_force")] <- "against_raise_debt_ceiling_self"
colnames(full_data)[which(colnames(full_data)=="CC340b_force")] <- "against_raise_debt_ceiling_rep"
colnames(full_data)[which(colnames(full_data)=="CC340c_force")] <- "against_raise_debt_ceiling_sen1"
colnames(full_data)[which(colnames(full_data)=="CC340d_force")] <- "against_raise_debt_ceiling_sen2"

colnames(full_data)[which(colnames(full_data)=="CC341A")] <- "roll_call_recovery"
colnames(full_data)[which(colnames(full_data)=="CC341B")] <- "roll_call_childrens_health"
colnames(full_data)[which(colnames(full_data)=="CC341C")] <- "roll_call_clean_energy"
colnames(full_data)[which(colnames(full_data)=="CC341D")] <- "roll_call_health_reform"
colnames(full_data)[which(colnames(full_data)=="CC341E")] <- "roll_call_dadt"
colnames(full_data)[which(colnames(full_data)=="CC341F")] <- "roll_call_dadt2"
colnames(full_data)[which(colnames(full_data)=="CC341G")] <- "roll_call_stemcell"
colnames(full_data)[which(colnames(full_data)=="CC341H")] <- "roll_call_tarp"

colnames(full_data)[which(colnames(full_data)=="CC342A")] <- "ideo_grid_self"
colnames(full_data)[which(colnames(full_data)=="CC342B")] <- "ideo_grid_gov"
colnames(full_data)[which(colnames(full_data)=="CC342C")] <- "ideo_grid_obama"
colnames(full_data)[which(colnames(full_data)=="CC342D")] <- "ideo_grid_dem"
colnames(full_data)[which(colnames(full_data)=="CC342E")] <- "ideo_grid_repub"
colnames(full_data)[which(colnames(full_data)=="CC342M")] <- "ideo_grid_teap"
colnames(full_data)[which(colnames(full_data)=="CC331F")] <- "ideo_grid_sen1"
colnames(full_data)[which(colnames(full_data)=="CC342G")] <- "ideo_grid_sen2"
colnames(full_data)[which(colnames(full_data)=="CC342H")] <- "ideo_grid_house_rep"
colnames(full_data)[which(colnames(full_data)=="CC342I")] <- "ideo_grid_supreme_court"

colnames(full_data)[which(colnames(full_data)=="CC350")] <- "climate"
colnames(full_data)[which(colnames(full_data)=="CC351_1")] <- "immigration_amnesty"
colnames(full_data)[which(colnames(full_data)=="CC351_2")] <- "immigration_increase_border"
colnames(full_data)[which(colnames(full_data)=="CC351_3")] <- "immigration_police_q"
colnames(full_data)[which(colnames(full_data)=="CC351_4")] <- "immigration_none"

colnames(full_data)[which(colnames(full_data)=="CC352")] <- "abortion"
colnames(full_data)[which(colnames(full_data)=="CC353")] <- "gay_marriage"
colnames(full_data)[which(colnames(full_data)=="CC354")] <- "affirmative_action"

colnames(full_data)[which(colnames(full_data)=="CC356_1")] <- "ensure_oil"
colnames(full_data)[which(colnames(full_data)=="CC356_2")] <- "destroy_terrorist"
colnames(full_data)[which(colnames(full_data)=="CC356_3")] <- "intervene_war"
colnames(full_data)[which(colnames(full_data)=="CC356_4")] <- "assist_demo"
colnames(full_data)[which(colnames(full_data)=="CC356_5")] <- "protect_am_allies"
colnames(full_data)[which(colnames(full_data)=="CC356_6")] <- "help_UN"
colnames(full_data)[which(colnames(full_data)=="CC356_7")] <- "no_intervene_actions"

colnames(full_data)[which(colnames(full_data)=="CC357")] <- "rase_taxes_v_spending"
colnames(full_data)[which(colnames(full_data)=="CC358")] <- "income_v_sales_tax"
colnames(full_data)[which(colnames(full_data)=="CC359")] <- "racial_resentment1"
colnames(full_data)[which(colnames(full_data)=="V212d")] <- "7_point_partyID"
colnames(full_data)[which(colnames(full_data)=="V243")] <- "ideology"
colnames(full_data)[which(colnames(full_data)=="V252")] <- "military_iam"
colnames(full_data)[which(colnames(full_data)=="V253")] <- "military_family"
colnames(full_data)[which(colnames(full_data)=="V254")] <- "military_iserved"
colnames(full_data)[which(colnames(full_data)=="V255")] <- "military_familyserved"
colnames(full_data)[which(colnames(full_data)=="V256")] <- "military_none"

colnames(full_data)[which(colnames(full_data)=="V258")] <- "years.in.current_city"
colnames(full_data)[which(colnames(full_data)=="V259")] <- "months.in.current_city"
colnames(full_data)[which(colnames(full_data)=="V261")] <- "years_current_address"
colnames(full_data)[which(colnames(full_data)=="V262")] <- "months_current_address"
colnames(full_data)[which(colnames(full_data)=="V241")] <- "number_children_under18"
colnames(full_data)[which(colnames(full_data)=="V242")] <- "r_child_under18"

colnames(full_data)[which(colnames(full_data)=="V216")] <- "born_again"
colnames(full_data)[which(colnames(full_data)=="V217")] <- "importance_religion"
colnames(full_data)[which(colnames(full_data)=="V218")] <- "prayer_frequency"

colnames(full_data)[which(colnames(full_data)=="V244")] <- "news_interest"
colnames(full_data)[which(colnames(full_data)=="V246")] <- "family_income"
colnames(full_data)[which(colnames(full_data)=="V247")] <- "family_income_high_brackets"
colnames(full_data)[which(colnames(full_data)=="V266")] <- "stock_ownership"

colnames(full_data)[which(colnames(full_data)=="V267")] <- "health_insur_employer"
colnames(full_data)[which(colnames(full_data)=="V268")] <- "health_insur_medicare"
colnames(full_data)[which(colnames(full_data)=="V269")] <- "health_insur_school"
colnames(full_data)[which(colnames(full_data)=="V270")] <- "health_insur_purchased"
colnames(full_data)[which(colnames(full_data)=="V271")] <- "health_insur_not_sure"
colnames(full_data)[which(colnames(full_data)=="V272")] <- "health_insur_no"

colnames(full_data)[which(colnames(full_data)=="CC370_1")] <- "during.survey_chore"
colnames(full_data)[which(colnames(full_data)=="CC370_2")] <- "during.survey_call"
colnames(full_data)[which(colnames(full_data)=="CC370_3")] <- "during.survey_text"
colnames(full_data)[which(colnames(full_data)=="CC370_4")] <- "during.survey_email"
colnames(full_data)[which(colnames(full_data)=="CC370_5")] <- "during.survey_break"
colnames(full_data)[which(colnames(full_data)=="CC370_6")] <- "during.survey_browse"
colnames(full_data)[which(colnames(full_data)=="CC370_7")] <- "during.survey_dealwchild"
colnames(full_data)[which(colnames(full_data)=="CC370_8")] <- "during.survey_talk_adult"
colnames(full_data)[which(colnames(full_data)=="CC370_9")] <- "during.survey_watch.tv"
colnames(full_data)[which(colnames(full_data)=="CC370_10")] <- "during.survey_none"

colnames(full_data)[which(colnames(full_data)=="V278")] <- "age"
colnames(full_data)[which(colnames(full_data)=="V302")] <- "state_fips"

######
full_data_save <- full_data
write.csv(full_data_save, "chicago_module_full.csv")

#Remove columns with extreme missingness or no variation
full_data <- read.csv("chicago_module_full.csv", h=T, stringsAsFactors=F)
full_data_numeric <- full_data[,as.vector(colSums(is.na(full_data)))<400]
remove_index <- which(colnames(full_data_numeric) %in% c("V206.North Dakota", "V206.Vermont", "CC334.Other", 
                                                         "V219.Eastern or Greek Orthodox", "V227.Assembly of Christian Churches", 
                                                         "V227.Pentecostal Holiness Church", "V232.Other Reformed", 
                                                         "V236.Russian Orthodox", "V240.Vaishnava Hinduism", 
                                                         "V211.Middle.Eastern", "V206.North.Dakota", "V219.Eastern.or.Greek.Orthodox"))
full_data_numeric <- full_data_numeric[,-remove_index]
#colnames(full_data)[which(colSums(is.na(full_data))>500)]#these have been checked and are legit 
full_data_numeric <- full_data_numeric[,-which(apply(full_data_numeric, 2, var, na.rm=T)==0)]#remove all columns that have no variance 
full_data_numeric <- full_data_numeric[,-which(colSums(full_data_numeric)<10)]#remove all columns that have no variance 
full_data_numeric <- full_data_numeric[,-c(1:4)]#delete from matter 
full_data_numeric <- full_data_numeric[,-c(188:217, 356:381)]

write.csv(full_data_numeric, "chicago_module_reduced.csv")
#END data cleaning 

#BEGIN Miscellaneous analysis 
#imputed dataset, for initial explorations 
full_data_numeric <- read.csv("chicago_module_reduced.csv")
full_data_numeric_pre_imputation <- full_data_numeric
#linearity_test <- my_get.linearly.independent(x_predictors_train)
#x_predictors_train_rem_perfect_collinear <- linearity_test$independent.markers
imputed_dataseta <- data.frame(amelia(full_data_numeric[,1:100], m=1)$imputation) #incheck=FALSE)
imputed_datasetb <- data.frame(amelia(full_data_numeric[,101:150], m=1)$imputation)
imputed_dataset_full <- cbind(imputed_dataseta, imputed_datasetb, full_data_numeric[,151:ncol(full_data_numeric)])
full_data_numeric <- imputed_dataset_full 
colnames(full_data_numeric) <- colnames(full_data_numeric_pre_imputation)
#write.csv(full_data_numeric, "chicago_module_reduced_imputed.csv")

#Miscellaneous data cleaning 
index_conspiracy_familiarity <- which(colnames(full_data_numeric) %in% c("truther", "birther", "fin.crisis", "CFLB", 
                                                                         "soros", "iraq_jew", "chemtrail"))
index_conspiracy_opinions <- which(colnames(full_data_numeric) %in% c("truther_b", "birther_b", "fin.crisis_b", "CFLB_b", 
                                                                      "soros_b", "iraq_jew_b", "chemtrail_b"))
index_other_endogeneous_variables <- which(colnames(full_data_numeric) %in% c())

set.seed(01244); full_data_numeric <- full_data_numeric[sample(nrow(full_data_numeric), replace=F),] ##Shuffle the Data
TrainingSet <- full_data_numeric[c(1:floor(2/3*nrow(full_data_numeric))),]
TestSet <- full_data_numeric[c(ceiling(2/3*nrow(full_data_numeric)):(nrow(full_data_numeric))),]

y_conspiracy_familiarity_train <- data.matrix(TrainingSet[,index_conspiracy_familiarity])
y_conspiracy_opinions_train <- data.matrix(TrainingSet[,index_conspiracy_opinions])
x_predictors_train_no_endogeneous <- data.matrix(TrainingSet[,-c(index_conspiracy_familiarity,
                                                                 index_conspiracy_opinions, 
                                                                 index_other_endogeneous_variables)])[,-c(1)]
x_predictors_train_all_predict.familiar <- data.matrix(TrainingSet[,-c(index_conspiracy_familiarity)])[,-1]
x_predictors_train_all_predict.opinion <- data.matrix(TrainingSet[,-c(index_conspiracy_opinions)])[,-1]

#Matching
full_data_numeric <- read.csv("chicago_module_reduced.csv")
full_data_numeric_pre_imputation <- full_data_numeric
imputed_dataseta <- data.frame(amelia(full_data_numeric[,1:100], m=1)$imputation) #incheck=FALSE)
imputed_datasetb <- data.frame(amelia(full_data_numeric[,101:150], m=1)$imputation)
imputed_dataset_full <- cbind(imputed_dataseta, imputed_datasetb, full_data_numeric[,151:ncol(full_data_numeric)])
full_data_numeric <- imputed_dataset_full 
colnames(full_data_numeric) <- colnames(full_data_numeric_pre_imputation)
index_conspiracy_familiarity <- which(colnames(full_data_numeric) %in% c("truther", "birther", "fin.crisis", "CFLB", 
                                                                         "soros", "iraq_jew", "chemtrail"))
index_conspiracy_opinions <- which(colnames(full_data_numeric) %in% c("truther_b", "birther_b", "fin.crisis_b", "CFLB_b", 
                                                                      "soros_b", "iraq_jew_b", "chemtrail_b"))
index_other_endogeneous_variables <- which(colnames(full_data_numeric) %in% c())

set.seed(01244); full_data_numeric <- full_data_numeric[sample(nrow(full_data_numeric), replace=F),] ##Shuffle the Data
TrainingSet <- full_data_numeric[c(1:floor(2/3*nrow(full_data_numeric))),]
TestSet <- full_data_numeric[c(ceiling(2/3*nrow(full_data_numeric)):(nrow(full_data_numeric))),]

y_conspiracy_familiarity_train <- data.matrix(TrainingSet[,index_conspiracy_familiarity])
y_conspiracy_opinions_train <- data.matrix(TrainingSet[,index_conspiracy_opinions])
x_predictors_train_no_endogeneous <- data.matrix(TrainingSet[,-c(index_conspiracy_familiarity,
                                                                 index_conspiracy_opinions, 
                                                                 index_other_endogeneous_variables)])[,-c(1)]

pdf("plot_matching_start.pdf", width=10, height=20)
par(mfrow=c(7,2))
#TRUTHER 
#######
data_truther <- cbind(y_conspiracy_familiarity_train[,1], 
                      y_conspiracy_opinions_train[,1], 
                      x_predictors_train_no_endogeneous)
data_truther <- as.data.frame(data_truther)
colnames(data_truther)[1:2] <- c("truther", "truther_b")
our.frontier_truther <- makeFrontier(dataset=as.data.frame(data_truther), 
                                     treatment="truther", 
                                     outcome="truther_b", 
                                     match.on=c("endtimes", "consp1_more_informed", 
                                                "belief.grid_ghosts", "liberal", "age"))
plotFrontier(our.frontier_truther, main="truther")

effects_truther <- estimateEffects(our.frontier_truther, 
                                   formula="truther_b~truther", 
                                   mod.dependence.formula=as.formula(truther_b~truther+endtimes+
                                                                       consp1_more_informed+
                                                                       liberal+
                                                                       belief.grid_ghosts+
                                                                       liberal+age), 
                                   continuous.vars=c("endtimes", "consp1_more_informed", 
                                                     "belief.grid_ghosts", "liberal", "age"))
plotEstimates(effects_truther, main="truther")
#######

#BIRTHER
######
data_birther <- cbind(y_conspiracy_familiarity_train[,2], 
                      y_conspiracy_opinions_train[,2], 
                      x_predictors_train_no_endogeneous)
data_birther <- as.data.frame(data_birther)
colnames(data_birther)[1:2] <- c("birther", "birther_b")
our.frontier_birther <- makeFrontier(dataset=as.data.frame(data_birther), 
                                     treatment="birther", 
                                     outcome="birther_b", 
                                     match.on=c("endtimes", "consp1_more_informed", 
                                                "belief.grid_ghosts", "belief.grid_god", 
                                                "liberal", "age"))
plotFrontier(our.frontier_birther, main="birther")

effects_birther <- estimateEffects(our.frontier_birther, 
                                   formula="birther_b~birther", 
                                   mod.dependence.formula=as.formula(birther_b~birther+endtimes+
                                                                       consp1_more_informed+
                                                                       liberal+
                                                                       belief.grid_ghosts+
                                                                       liberal+age), 
                                   continuous.vars=c("endtimes", "consp1_more_informed", 
                                                     "belief.grid_ghosts", "liberal", "age"))
plotEstimates(effects_birther, main="birther")
#######

#FIN.CRISIS
######
data_fin.crisis <- cbind(y_conspiracy_familiarity_train[,3], 
                         y_conspiracy_opinions_train[,3], 
                         x_predictors_train_no_endogeneous)
data_fin.crisis <- as.data.frame(data_fin.crisis)
colnames(data_fin.crisis)[1:2] <- c("fin.crisis", "fin.crisis_b")
our.frontier_fin.crisis <- makeFrontier(dataset=as.data.frame(data_fin.crisis), 
                                        treatment="fin.crisis", 
                                        outcome="fin.crisis_b", 
                                        match.on=c("endtimes", "consp1_more_informed", 
                                                   "belief.grid_ghosts", "belief.grid_god", 
                                                   "liberal", "age"))
plotFrontier(our.frontier_fin.crisis, main="fin.crisis")

effects_fin.crisis <- estimateEffects(our.frontier_fin.crisis, 
                                      formula="fin.crisis_b~fin.crisis", 
                                      mod.dependence.formula=as.formula(fin.crisis_b~fin.crisis+endtimes+
                                                                          consp1_more_informed+
                                                                          liberal+
                                                                          belief.grid_ghosts+
                                                                          liberal+age), 
                                      continuous.vars=c("endtimes", "consp1_more_informed", 
                                                        "belief.grid_ghosts", "liberal", "age"))
plotEstimates(effects_fin.crisis, main="fin.crisis")
#######

#CFLB
#####
data_CFLB <- cbind(y_conspiracy_familiarity_train[,4], 
                   y_conspiracy_opinions_train[,4], 
                   x_predictors_train_no_endogeneous)
data_CFLB <- as.data.frame(data_CFLB)
colnames(data_CFLB)[1:2] <- c("CFLB", "CFLB_b")
our.frontier_CFLB <- makeFrontier(dataset=as.data.frame(data_CFLB), 
                                  treatment="CFLB", 
                                  outcome="CFLB_b", 
                                  match.on=c("endtimes", "consp1_more_informed", 
                                             "belief.grid_ghosts", "belief.grid_god", 
                                             "liberal", "age"))
plotFrontier(our.frontier_CFLB, main="CFLB")

effects_CFLB <- estimateEffects(our.frontier_CFLB, 
                                formula="CFLB_b~CFLB", 
                                mod.dependence.formula=as.formula(CFLB_b~CFLB+endtimes+
                                                                    consp1_more_informed+
                                                                    liberal+
                                                                    belief.grid_ghosts+
                                                                    liberal+age), 
                                continuous.vars=c("endtimes", "consp1_more_informed", 
                                                  "belief.grid_ghosts", "liberal", "age"))
plotEstimates(effects_CFLB, main="CFLB")
#######

#SOROS
######
data_soros <- cbind(y_conspiracy_familiarity_train[,5], 
                    y_conspiracy_opinions_train[,5], 
                    x_predictors_train_no_endogeneous)
data_soros <- as.data.frame(data_soros)
colnames(data_soros)[1:2] <- c("soros", "soros_b")
our.frontier_soros <- makeFrontier(dataset=as.data.frame(data_soros), 
                                   treatment="soros", 
                                   outcome="soros_b", 
                                   match.on=c("endtimes", "consp1_more_informed", 
                                              "belief.grid_ghosts", "belief.grid_god", 
                                              "liberal", "age"))
plotFrontier(our.frontier_soros, main="soros")

effects_soros <- estimateEffects(our.frontier_soros, 
                                 formula="soros_b~soros", 
                                 mod.dependence.formula=as.formula(soros_b~soros+endtimes+
                                                                     consp1_more_informed+
                                                                     liberal+
                                                                     belief.grid_ghosts+
                                                                     liberal+age), 
                                 continuous.vars=c("endtimes", "consp1_more_informed", 
                                                   "belief.grid_ghosts", "liberal", "age"))
plotEstimates(effects_soros, main="soros")
#####

#IRAQ JEW 
######
data_iraq_jew <- cbind(y_conspiracy_familiarity_train[,6], 
                       y_conspiracy_opinions_train[,6], 
                       x_predictors_train_no_endogeneous)
data_iraq_jew <- as.data.frame(data_iraq_jew)
colnames(data_iraq_jew)[1:2] <- c("iraq_jew", "iraq_jew_b")
our.frontier_iraq_jew <- makeFrontier(dataset=as.data.frame(data_iraq_jew), 
                                      treatment="iraq_jew", 
                                      outcome="iraq_jew_b", 
                                      match.on=c("endtimes", "consp1_more_informed", 
                                                 "belief.grid_ghosts", "belief.grid_god", 
                                                 "liberal", "age"))
plotFrontier(our.frontier_iraq_jew, main="iraq_jew")

effects_iraq_jew <- estimateEffects(our.frontier_iraq_jew, 
                                    formula="iraq_jew_b~iraq_jew", 
                                    mod.dependence.formula=as.formula(iraq_jew_b~iraq_jew+endtimes+
                                                                        consp1_more_informed+
                                                                        liberal+
                                                                        belief.grid_ghosts+
                                                                        liberal+age), 
                                    continuous.vars=c("endtimes", "consp1_more_informed", 
                                                      "belief.grid_ghosts", "liberal", "age"))
plotEstimates(effects_iraq_jew, main="iraq_jew")
#####

#CHEMTRAIL
######
data_chemtrail <- cbind(y_conspiracy_familiarity_train[,7], 
                        y_conspiracy_opinions_train[,7], 
                        x_predictors_train_no_endogeneous)
data_chemtrail <- as.data.frame(data_chemtrail)
colnames(data_chemtrail)[1:2] <- c("chemtrail", "chemtrail_b")
our.frontier_chemtrail <- makeFrontier(dataset=as.data.frame(data_chemtrail), 
                                       treatment="chemtrail", 
                                       outcome="chemtrail_b", 
                                       match.on=c("endtimes", "consp1_more_informed", 
                                                  "belief.grid_ghosts", "belief.grid_god", 
                                                  "liberal", "age"))
plotFrontier(our.frontier_chemtrail, main="chemtrail")

effects_chemtrail <- estimateEffects(our.frontier_chemtrail, 
                                     formula="chemtrail_b~chemtrail", 
                                     mod.dependence.formula=as.formula(chemtrail_b~chemtrail+endtimes+
                                                                         consp1_more_informed+
                                                                         liberal+
                                                                         belief.grid_ghosts+
                                                                         liberal+age), 
                                     continuous.vars=c("endtimes", "consp1_more_informed", 
                                                       "belief.grid_ghosts", "liberal", "age"))
plotEstimates(effects_chemtrail, main="chemtrail")
######
dev.off()

#Explorations into the proportions  
full_data_numeric <- read.csv("chicago_module_reduced.csv")
full_data_numeric_pre_imputation <- full_data_numeric
imputed_dataseta <- data.frame(amelia(full_data_numeric[,1:100], m=1)$imputation) #incheck=FALSE)
imputed_datasetb <- data.frame(amelia(full_data_numeric[,101:150], m=1)$imputation)
imputed_dataset_full <- cbind(imputed_dataseta, imputed_datasetb, full_data_numeric[,151:ncol(full_data_numeric)])
full_data_numeric <- imputed_dataset_full 
colnames(full_data_numeric) <- colnames(full_data_numeric_pre_imputation)
index_conspiracy_familiarity <- which(colnames(full_data_numeric) %in% c("truther", "birther", "fin.crisis", "CFLB", 
                                                                         "soros", "iraq_jew", "chemtrail"))
index_conspiracy_opinions <- which(colnames(full_data_numeric) %in% c("truther_b", "birther_b", "fin.crisis_b", "CFLB_b", 
                                                                      "soros_b", "iraq_jew_b", "chemtrail_b"))
index_other_endogeneous_variables <- which(colnames(full_data_numeric) %in% c())

full_conspiracy_familarity_FULL  <- full_data_numeric[,index_conspiracy_familiarity]
full_conspiracy_opinions_FULL <- full_data_numeric[,index_conspiracy_opinions]

rowsums <- rowSums(full_data_numeric[,index_conspiracy_familiarity[-4]])
mean(rowsums>0)

mean(full_data_numeric$birther)

mean_truther <- mean(full_data_numeric$truther==1)
mean_birther <- mean(full_data_numeric$birther==1)
mean_fin.crisis <- mean(full_data_numeric$fin.crisis==1)
mean_CFLB <- mean(full_data_numeric$CFLB==1)
mean_soros <- mean(full_data_numeric$soros==1)
mean_iraq_jew <- mean(full_data_numeric$iraq_jew==1)
mean_chemtrail <- mean(full_data_numeric$chemtrail==1)
means_familarity <- cbind(mean_truther, mean_birther, mean_fin.crisis, 
                          mean_CFLB, mean_soros, mean_iraq_jew, 
                          mean_chemtrail)
print(means_familarity)

mean_chemtrail_and_CFLB_1 <- mean(full_data_numeric$chemtrail==0&full_data_numeric$CFLB==0)
mean_chemtrail_and_CFLB_2 <- mean(full_data_numeric$chemtrail==0&full_data_numeric$CFLB==1)
mean_chemtrail_and_CFLB_3 <- mean(full_data_numeric$chemtrail==1&full_data_numeric$CFLB==0)
mean_chemtrail_and_CFLB_4 <- mean(full_data_numeric$chemtrail==1&full_data_numeric$CFLB==1)/mean(full_data_numeric$CFLB==1)
chem_CFLB <- cbind(mean_chemtrail_and_CFLB_1, mean_chemtrail_and_CFLB_2, mean_chemtrail_and_CFLB_3, mean_chemtrail_and_CFLB_4)
print(chem_CFLB)

mean_0 <- mean(rowSums(full_conspiracy_familarity_FULL)==0)
mean_1 <- mean(rowSums(full_conspiracy_familarity_FULL)==1)
mean_2 <- mean(rowSums(full_conspiracy_familarity_FULL)==2)
mean_3 <- mean(rowSums(full_conspiracy_familarity_FULL)==3)
mean_4 <- mean(rowSums(full_conspiracy_familarity_FULL)==4)
mean_5 <- mean(rowSums(full_conspiracy_familarity_FULL)==5)
mean_6 <- mean(rowSums(full_conspiracy_familarity_FULL)==6)
mean_7 <- mean(rowSums(full_conspiracy_familarity_FULL)==7)
mean_familarity_counts <- cbind(mean_0, mean_1,mean_2,mean_3,mean_4,mean_5,mean_6,mean_7)
print(means)

correlation_matrix_df <- as.data.frame(cbind(full_conspiracy_familarity_FULL, full_conspiracy_opinions_FULL))
correlation_matrix <- cor(correlation_matrix_df)
sort(unlist(correlation_matrix))
min(correlation_matrix)

mean_belief <- colSums(full_conspiracy_opinions_FULL)/1000

mean(full_conspiracy_opinions_FULL$truther_b<=1 & full_conspiracy_familarity_FULL$truther==1)
mean(full_conspiracy_opinions_FULL$birther_b<=1 & full_conspiracy_familarity_FULL$birther==1)
mean(full_conspiracy_opinions_FULL$fin.crisis_b<=1 & full_conspiracy_familarity_FULL$fin.crisis==1)
mean(full_conspiracy_opinions_FULL$CFLB_b<=1 & full_conspiracy_familarity_FULL$CFLB==1)
mean(full_conspiracy_opinions_FULL$soros_b<=1 & full_conspiracy_familarity_FULL$soros==1)
mean(full_conspiracy_opinions_FULL$iraq_jew_b<=1 & full_conspiracy_familarity_FULL$iraq_jew==1)
mean(full_conspiracy_opinions_FULL$chemtrail_b<=1 & full_conspiracy_familarity_FULL$chemtrail==1)
#END miscelaneous analysis 

##BEGIN Logits 
full_data_numeric <- read.csv("chicago_module_reduced.csv")
full_data_numeric_pre_imputation <- full_data_numeric
imputed_dataseta <- data.frame(amelia(full_data_numeric[,1:100], m=1)$imputation) #incheck=FALSE)
imputed_datasetb <- data.frame(amelia(full_data_numeric[,101:150], m=1)$imputation)
imputed_dataset_full <- cbind(imputed_dataseta, imputed_datasetb, full_data_numeric[,151:ncol(full_data_numeric)])
full_data_numeric <- imputed_dataset_full
colnames(full_data_numeric) <- colnames(full_data_numeric_pre_imputation)
index_conspiracy_familiarity <- which(colnames(full_data_numeric) %in% c("truther", "birther", "fin.crisis", "CFLB", 
                                                                         "soros", "iraq_jew", "chemtrail"))
index_conspiracy_opinions <- which(colnames(full_data_numeric) %in% c("truther_b", "birther_b", "fin.crisis_b", "CFLB_b", 
                                                                      "soros_b", "iraq_jew_b", "chemtrail_b"))
index_other_endogeneous_variables <- which(colnames(full_data_numeric) %in% c())

angels <- full_data_numeric[grep("belief.grid", colnames(full_data_numeric))[3]]
devil <- full_data_numeric[grep("belief.grid", colnames(full_data_numeric))[4]]

ghosts <- full_data_numeric[grep("belief.grid", colnames(full_data_numeric))[1]]
esp <- full_data_numeric$attitude3_esp

paranormal <- as.data.frame(ghosts + esp); colnames(paranormal) <- "paranormal"
supernatural <- as.data.frame(angels + devil); colnames(supernatural) <- "supernatural"
endtimes <- as.data.frame(full_data_numeric$endtimes_followup); colnames(endtimes) <- "endtimes"
cabal <- as.data.frame(full_data_numeric$consp1_cabal); colnames(cabal) <- "cabal"
manichean <- as.data.frame(full_data_numeric$attitude2_good_v_evil);  colnames(manichean) <- "manichean"

full_data_numeric <- cbind(full_data_numeric, paranormal, supernatural, endtimes, cabal, manichean)

#("truther", "birther", "fin.crisis", "CFLB", 
# "soros", "iraq_jew", "chemtrail"))

#Logits
x_vars <- cbind(full_data_numeric$age, full_data_numeric$female, full_data_numeric$black, full_data_numeric$evangelical, 
                full_data_numeric$liberal, full_data_numeric$conservative, full_data_numeric$trust, 
                c(full_data_numeric$education.Post.grad+full_data_numeric$education.4.year), 
                paranormal, supernatural, endtimes, cabal, manichean)
colnames(x_vars) <- c("age", "female", "black", "evangelical", "liberal", "conservative", "trust", 
                      "college_grad", "paranormal", "supernatural", "endtimes", "cabal", "manichean")
x_vars <- as.matrix(x_vars)

truther_logit <- glm(truther~x_vars, family="binomial", data=full_data_numeric)#significant 
birther_logit <- glm(birther~x_vars, family="binomial", data=full_data_numeric)
fin.crisis_logit <- glm(fin.crisis~x_vars, family="binomial", data=full_data_numeric)
CFLB_logit <- glm(CFLB~x_vars, family="binomial", data=full_data_numeric)
soros_logit <- glm(soros~x_vars, family="binomial", data=full_data_numeric)
iraq_jew_logit <- glm(iraq_jew~x_vars, family="binomial", data=full_data_numeric)
chemtrail_logit <- glm(chemtrail~x_vars, family="binomial", data=full_data_numeric)

chemtrail_logit <- glm(chemtrail~x_vars[,1:(ncol(x_vars)-4)], family="binomial", data=full_data_numeric)
summary(chemtrail_logit)

stargazer(truther_logit, birther_logit, fin.crisis_logit, CFLB_logit, iraq_jew_logit, chemtrail_logit)
#END logits 

#BEGIN standard Lasso 
full_data_numeric <- read.csv("chicago_module_reduced.csv")
full_data_numeric_pre_imputation <- full_data_numeric
#set.seed(01244); SCRAMBLE_INDICES <- sample(nrow(full_data_numeric))

#creating the indices 
index_conspiracy_familiarity <- which(colnames(full_data_numeric) %in% c("truther", "birther", "fin.crisis", "CFLB", 
                                                                         "soros", "iraq_jew", "chemtrail"))
index_conspiracy_opinions <- which(colnames(full_data_numeric) %in% c("truther_b", "birther_b", "fin.crisis_b", "CFLB_b", 
                                                                      "soros_b", "iraq_jew_b", "chemtrail_b"))
index_other_endogeneous_variables <- which(colnames(full_data_numeric) %in% c())

#algorithm - predicting familiarity WITH opinion vars 
#####
store_coefficients_truther <- c()
store_coefficients_birther <- c()
store_coefficients_fin.crisis <- c()
store_coefficients_CFLB <- c()
store_coefficients_soros <- c()
store_coefficients_iraq_jew <- c()
store_coefficients_chemtrail <- c()

n_iterations <- 100
for(i in 1:n_iterations)
{ 
  print(i)
  full_data_numeric <- read.csv("chicago_module_reduced.csv")
  
  #imputation at each iteration 
  imputed_dataseta <- data.frame(amelia(full_data_numeric[,1:100], m=1)$imputation) #incheck=FALSE)
  imputed_datasetb <- data.frame(amelia(full_data_numeric[,101:150], m=1)$imputation)
  imputed_dataset_full <- cbind(imputed_dataseta, imputed_datasetb, full_data_numeric[,151:ncol(full_data_numeric)])
  full_data_numeric <- imputed_dataset_full 
  colnames(full_data_numeric) <- colnames(full_data_numeric_pre_imputation)
  
  #setting up the data 
  full_data_numeric <- full_data_numeric[SCRAMBLE_INDICES,] ##Shuffle the Data
  TrainingSet <- full_data_numeric[c(1:floor(2/3*nrow(full_data_numeric))),]
  TestSet <- full_data_numeric[c(ceiling(2/3*nrow(full_data_numeric)):(nrow(full_data_numeric))),]
  
  y_conspiracy_familiarity_train <- data.matrix(TrainingSet[,index_conspiracy_familiarity])
  y_conspiracy_opinions_train <- data.matrix(TrainingSet[,index_conspiracy_opinions])
  x_predictors_train_no_endogeneous <- data.matrix(TrainingSet[,-c(index_conspiracy_familiarity,
                                                                   index_conspiracy_opinions, 
                                                                   index_other_endogeneous_variables)])[,-c(1)]
  x_predictors_train_all_predict.familiar <- data.matrix(TrainingSet[,-c(index_conspiracy_familiarity)])[,-1]
  x_predictors_train_all_predict.opinion <- data.matrix(TrainingSet[,-c(index_conspiracy_opinions)])[,-1]
  
  ##truther
  lasso_truther <- cv.glmnet(x=x_predictors_train_all_predict.familiar, y=y_conspiracy_familiarity_train[,1], 
                             nlambda=100,
                             nfolds=10, 
                             alpha=1, 
                             standardize=TRUE, 
                             family="binomial")
  ceof_truther <- coef(lasso_truther,s=lasso_truther$lambda.min)#use this, since everything isn't set to zero. 
  ceof_truther2 <- coef(lasso_truther,s=lasso_truther$lambda.1se)
  coeff_truther_locator <- which(abs(ceof_truther2)>0)
  store_coefficients_truther <- append(store_coefficients_truther, (coeff_truther_locator - 1)[-1])
  
  ##birther
  lasso_birther <- cv.glmnet(x=x_predictors_train_all_predict.familiar, y=y_conspiracy_familiarity_train[,2], 
                             nlambda=100,
                             nfolds=10, 
                             alpha=1, 
                             standardize=TRUE, 
                             family="binomial")
  ceof_birther <- coef(lasso_birther,s=lasso_birther$lambda.min)#use this, since everything isn't set to zero. 
  ceof_birther2 <- coef(lasso_birther,s=lasso_birther$lambda.1se)
  coeff_birther_locator <- which(abs(ceof_birther2)>0)
  store_coefficients_birther <- append(store_coefficients_birther, (coeff_birther_locator - 1)[-1])
  
  ##fin.crisis
  lasso_fin.crisis <- cv.glmnet(x=x_predictors_train_all_predict.familiar, y=y_conspiracy_familiarity_train[,3], 
                                nlambda=100,
                                nfolds=10, 
                                alpha=1, 
                                standardize=TRUE, 
                                family="binomial")
  ceof_fin.crisis <- coef(lasso_fin.crisis,s=lasso_fin.crisis$lambda.min)#use this, since everything isn't set to zero. 
  ceof_fin.crisis2 <- coef(lasso_fin.crisis,s=lasso_fin.crisis$lambda.1se)
  coeff_fin.crisis_locator <- which(abs(ceof_fin.crisis)>0)
  store_coefficients_fin.crisis <- append(store_coefficients_fin.crisis, (coeff_fin.crisis_locator - 1)[-1])
  
  ##CFLB
  lasso_CFLB <- cv.glmnet(x=x_predictors_train_all_predict.familiar, y=y_conspiracy_familiarity_train[,4], 
                          nlambda=100,
                          nfolds=10, 
                          alpha=1, 
                          standardize=TRUE, 
                          family="binomial")
  ceof_CFLB <- coef(lasso_CFLB,s=lasso_CFLB$lambda.min)#use this, since everything isn't set to zero. 
  ceof_CFLB2 <- coef(lasso_CFLB,s=lasso_CFLB$lambda.1se)
  coeff_CFLB_locator <- which(abs(ceof_CFLB2)>0)
  store_coefficients_CFLB <- append(store_coefficients_CFLB, (coeff_CFLB_locator - 1)[-1])
  
  ##soros
  lasso_soros <- cv.glmnet(x=x_predictors_train_all_predict.familiar, y=y_conspiracy_familiarity_train[,5], 
                           nlambda=100,
                           nfolds=10, 
                           alpha=1, 
                           standardize=TRUE, 
                           family="binomial")
  ceof_soros <- coef(lasso_soros,s=lasso_soros$lambda.min)#use this, since everything isn't set to zero. 
  ceof_soros2 <- coef(lasso_soros,s=lasso_soros$lambda.1se)
  coeff_soros_locator <- which(abs(ceof_soros2)>0)
  store_coefficients_soros <- append(store_coefficients_soros, (coeff_soros_locator - 1)[-1])
  
  ##iraq_jew
  lasso_iraq_jew <- cv.glmnet(x=x_predictors_train_all_predict.familiar, y=y_conspiracy_familiarity_train[,6], 
                              nlambda=100,
                              nfolds=10, 
                              alpha=1, 
                              standardize=TRUE, 
                              family="binomial")
  ceof_iraq_jew <- coef(lasso_iraq_jew,s=lasso_iraq_jew$lambda.min)#use this, since everything isn't set to zero. 
  ceof_iraq_jew2 <- coef(lasso_iraq_jew,s=lasso_iraq_jew$lambda.1se)
  coeff_iraq_jew_locator <- which(abs(ceof_iraq_jew2)>0)
  store_coefficients_iraq_jew <- append(store_coefficients_iraq_jew, (coeff_iraq_jew_locator - 1)[-1])
  
  ##chemtrail
  lasso_chemtrail <- cv.glmnet(x=x_predictors_train_all_predict.familiar, y=y_conspiracy_familiarity_train[,7], 
                               nlambda=100,
                               nfolds=10, 
                               alpha=1, 
                               standardize=TRUE, 
                               family="binomial")
  ceof_chemtrail <- coef(lasso_chemtrail,s=lasso_chemtrail$lambda.min)#use this, since everything isn't set to zero. 
  ceof_chemtrail2 <- coef(lasso_chemtrail,s=lasso_chemtrail$lambda.1se)
  coeff_chemtrail_locator <- which(abs(ceof_chemtrail2)>0)
  store_coefficients_chemtrail <- append(store_coefficients_chemtrail, (coeff_chemtrail_locator - 1)[-1])
}

threshold <- n_iterations 
#truther
table_truther <- table(store_coefficients_truther)
locators_truther <- as.numeric(rownames(table_truther[which(table_truther>=threshold)]))
colnames(x_predictors_train_all_predict.familiar[,c(locators_truther)])

#birther
table_birther <- table(store_coefficients_birther)
locators_birther <- as.numeric(rownames(table_birther[which(table_birther>=threshold)]))
colnames(x_predictors_train_all_predict.familiar[,c(locators_birther)])

#fin.crisis
table_fin.crisis <- table(store_coefficients_fin.crisis)
locators_fin.crisis <- as.numeric(rownames(table_fin.crisis[which(table_fin.crisis>=threshold)]))
colnames(x_predictors_train_all_predict.familiar[,c(locators_fin.crisis)])

#CFLB
table_CFLB <- table(store_coefficients_CFLB)
locators_CFLB <- as.numeric(rownames(table_CFLB[which(table_CFLB>=threshold)]))
colnames(x_predictors_train_all_predict.familiar[,c(locators_CFLB)])

#soros
table_soros <- table(store_coefficients_soros)
locators_soros <- as.numeric(rownames(table_soros[which(table_soros>=threshold)]))
colnames(x_predictors_train_all_predict.familiar[,c(locators_soros)])

#iraq_jew
table_iraq_jew <- table(store_coefficients_iraq_jew)
locators_iraq_jew <- as.numeric(rownames(table_iraq_jew[which(table_iraq_jew>=threshold)]))
colnames(x_predictors_train_all_predict.familiar[,c(locators_iraq_jew)])

#chemtrail
table_chemtrail <- table(store_coefficients_chemtrail)
locators_chemtrail <- as.numeric(rownames(table_chemtrail[which(table_chemtrail>=threshold)]))
colnames(x_predictors_train_all_predict.familiar[,c(locators_chemtrail)])

#name the strong predictors 
names(table_truther) <- colnames(x_predictors_train_all_predict.familiar)[as.numeric(rownames(table_truther))]
names(table_birther) <- colnames(x_predictors_train_all_predict.familiar)[as.numeric(rownames(table_birther))]
names(table_fin.crisis) <- colnames(x_predictors_train_all_predict.familiar)[as.numeric(rownames(table_fin.crisis))]
names(table_CFLB) <- colnames(x_predictors_train_all_predict.familiar)[as.numeric(rownames(table_CFLB))]
names(table_soros) <- colnames(x_predictors_train_all_predict.familiar)[as.numeric(rownames(table_soros))]
names(table_iraq_jew) <- colnames(x_predictors_train_all_predict.familiar)[as.numeric(rownames(table_iraq_jew))]
names(table_chemtrail) <- colnames(x_predictors_train_all_predict.familiar)[as.numeric(rownames(table_chemtrail))]

###PREDICTING OPINIONS 
store_coefficients_truther_b <- c()
store_coefficients_birther_b <- c()
store_coefficients_fin.crisis_b <- c()
store_coefficients_CFLB_b <- c()
store_coefficients_soros_b <- c()
store_coefficients_iraq_jew_b <- c()
store_coefficients_chemtrail_b <- c()

n_iterations <- 100
for(i in 1:n_iterations)
{ 
  print(i)
  full_data_numeric <- read.csv("chicago_module_reduced.csv")
  
  #imputation at each iteration 
  SCRAMBLE_INDICES <- sample(nrow(full_data_numeric))
  imputed_dataseta <- data.frame(amelia(full_data_numeric[,1:100], m=1)$imputation) #incheck=FALSE)
  imputed_datasetb <- data.frame(amelia(full_data_numeric[,101:150], m=1)$imputation)
  imputed_dataset_full <- cbind(imputed_dataseta, imputed_datasetb, full_data_numeric[,151:ncol(full_data_numeric)])
  full_data_numeric <- imputed_dataset_full 
  colnames(full_data_numeric) <- colnames(full_data_numeric_pre_imputation)
  
  #setting up the data 
  full_data_numeric <- full_data_numeric[SCRAMBLE_INDICES,] ##Shuffle the Data
  TrainingSet <- full_data_numeric[c(1:floor(2/3*nrow(full_data_numeric))),]
  TestSet <- full_data_numeric[c(ceiling(2/3*nrow(full_data_numeric)):(nrow(full_data_numeric))),]
  
  y_conspiracy_familiarity_train <- data.matrix(TrainingSet[,index_conspiracy_familiarity])
  y_conspiracy_opinions_train <- data.matrix(TrainingSet[,index_conspiracy_opinions])
  x_predictors_train_no_endogeneous <- data.matrix(TrainingSet[,-c(index_conspiracy_familiarity,
                                                                   index_conspiracy_opinions, 
                                                                   index_other_endogeneous_variables)])[,-c(1)]
  x_predictors_train_all_predict.familiar <- data.matrix(TrainingSet[,-c(index_conspiracy_familiarity)])[,-1]
  x_predictors_train_all_predict.opinion <- data.matrix(TrainingSet[,-c(index_conspiracy_opinions)])[,-1]
  
  ##truther_b
  lasso_truther_b <- cv.glmnet(x=x_predictors_train_all_predict.opinion, y=y_conspiracy_opinions_train[,1], 
                               nlambda=100,
                               nfolds=10, 
                               alpha=1, 
                               standardize=TRUE, 
                               family="gaussian")
  ceof_truther_b <- coef(lasso_truther_b,s=lasso_truther_b$lambda.min)#use this, since everything isn't set to zero. 
  ceof_truther_b2 <- coef(lasso_truther_b,s=lasso_truther_b$lambda.1se)
  coeff_truther_b_locator <- which(abs(ceof_truther_b2)>0)
  store_coefficients_truther_b <- append(store_coefficients_truther_b, (coeff_truther_b_locator - 1)[-1])
  
  ##birther_b
  lasso_birther_b <- cv.glmnet(x=x_predictors_train_all_predict.opinion, y=y_conspiracy_opinions_train[,2], 
                               nlambda=100,
                               nfolds=10, 
                               alpha=1, 
                               standardize=TRUE, 
                               family="gaussian")
  ceof_birther_b <- coef(lasso_birther_b,s=lasso_birther_b$lambda.min)#use this, since everything isn't set to zero. 
  ceof_birther_b2 <- coef(lasso_birther_b,s=lasso_birther_b$lambda.1se)
  coeff_birther_b_locator <- which(abs(ceof_birther_b2)>0)
  store_coefficients_birther_b <- append(store_coefficients_birther_b, (coeff_birther_b_locator - 1)[-1])
  
  ##fin.crisis_b
  lasso_fin.crisis_b <- cv.glmnet(x=x_predictors_train_all_predict.opinion, y=y_conspiracy_opinions_train[,3], 
                                  nlambda=100,
                                  nfolds=10, 
                                  alpha=1, 
                                  standardize=TRUE, 
                                  family="gaussian")
  ceof_fin.crisis_b <- coef(lasso_fin.crisis_b,s=lasso_fin.crisis_b$lambda.min)#use this, since everything isn't set to zero. 
  ceof_fin.crisis_b2 <- coef(lasso_fin.crisis_b,s=lasso_fin.crisis_b$lambda.1se)
  coeff_fin.crisis_b_locator <- which(abs(ceof_fin.crisis_b)>0)
  store_coefficients_fin.crisis_b <- append(store_coefficients_fin.crisis_b, (coeff_fin.crisis_b_locator - 1)[-1])
  
  ##CFLB_b
  lasso_CFLB_b <- cv.glmnet(x=x_predictors_train_all_predict.opinion, y=y_conspiracy_opinions_train[,4], 
                            nlambda=100,
                            nfolds=10, 
                            alpha=1, 
                            standardize=TRUE, 
                            family="gaussian")
  ceof_CFLB_b <- coef(lasso_CFLB_b,s=lasso_CFLB_b$lambda.min)#use this, since everything isn't set to zero. 
  ceof_CFLB_b2 <- coef(lasso_CFLB_b,s=lasso_CFLB_b$lambda.1se)
  coeff_CFLB_b_locator <- which(abs(ceof_CFLB_b2)>0)
  store_coefficients_CFLB_b <- append(store_coefficients_CFLB_b, (coeff_CFLB_b_locator - 1)[-1])
  
  ##soros_b
  lasso_soros_b <- cv.glmnet(x=x_predictors_train_all_predict.opinion, y=y_conspiracy_opinions_train[,5], 
                             nlambda=100,
                             nfolds=10, 
                             alpha=1, 
                             standardize=TRUE, 
                             family="gaussian")
  ceof_soros_b <- coef(lasso_soros_b,s=lasso_soros_b$lambda.min)#use this, since everything isn't set to zero. 
  ceof_soros_b2 <- coef(lasso_soros_b,s=lasso_soros_b$lambda.1se)
  coeff_soros_b_locator <- which(abs(ceof_soros_b2)>0)
  store_coefficients_soros_b <- append(store_coefficients_soros_b, (coeff_soros_b_locator - 1)[-1])
  
  ##iraq_jew_b
  lasso_iraq_jew_b <- cv.glmnet(x=x_predictors_train_all_predict.opinion, y=y_conspiracy_opinions_train[,6], 
                                nlambda=100,
                                nfolds=10, 
                                alpha=1, 
                                standardize=TRUE, 
                                family="gaussian")
  ceof_iraq_jew_b <- coef(lasso_iraq_jew_b,s=lasso_iraq_jew_b$lambda.min)#use this, since everything isn't set to zero. 
  ceof_iraq_jew_b2 <- coef(lasso_iraq_jew_b,s=lasso_iraq_jew_b$lambda.1se)
  coeff_iraq_jew_b_locator <- which(abs(ceof_iraq_jew_b2)>0)
  store_coefficients_iraq_jew_b <- append(store_coefficients_iraq_jew_b, (coeff_iraq_jew_b_locator - 1)[-1])
  
  ##chemtrail_b
  lasso_chemtrail_b <- cv.glmnet(x=x_predictors_train_all_predict.opinion, y=y_conspiracy_opinions_train[,7], 
                                 nlambda=100,
                                 nfolds=10, 
                                 alpha=1, 
                                 standardize=TRUE, 
                                 family="gaussian")
  ceof_chemtrail_b <- coef(lasso_chemtrail_b,s=lasso_chemtrail_b$lambda.min)#use this, since everything isn't set to zero. 
  ceof_chemtrail_b2 <- coef(lasso_chemtrail_b,s=lasso_chemtrail_b$lambda.1se)
  coeff_chemtrail_b_locator <- which(abs(ceof_chemtrail_b2)>0)
  store_coefficients_chemtrail_b <- append(store_coefficients_chemtrail_b, (coeff_chemtrail_b_locator - 1)[-1])
}

threshold <- n_iterations

#load in prior data 
#load("all_opinion_lasso.RData")
table_chemtrail_b <- my_list$table_chemtrail_b
table_CFLB_b <- my_list$table_CFLB_b
table_fin.crisis_b <- my_list$table_fin.crisis_b
table_soros_b <- my_list$table_soros_b
table_iraq_jew_b <- my_list$table_iraq_jew_b
table_truther_b <- my_list$table_iraq_jew_b
table_birther_b <- my_list$table_birther_b

#truther_b
table_truther_b <- table(store_coefficients_truther_b)
locators_truther_b <- as.numeric(rownames(table_truther_b[which(table_truther_b>=threshold)]))
colnames(x_predictors_train_all_predict.opinion[,c(locators_truther_b)])

#birther_b
table_birther_b <- table(store_coefficients_birther_b)
locators_birther_b <- as.numeric(rownames(table_birther_b[which(table_birther_b>=threshold)]))
colnames(x_predictors_train_all_predict.opinion[,c(locators_birther_b)])

#fin.crisis_b
table_fin.crisis_b <- table(store_coefficients_fin.crisis_b)
locators_fin.crisis_b <- as.numeric(rownames(table_fin.crisis_b[which(table_fin.crisis_b>=threshold)]))
colnames(x_predictors_train_all_predict.opinion[,c(locators_fin.crisis_b)])

#CFLB_b
table_CFLB_b <- table(store_coefficients_CFLB_b)
locators_CFLB_b <- as.numeric(rownames(table_CFLB_b[which(table_CFLB_b>=threshold)]))
colnames(x_predictors_train_all_predict.opinion[,c(locators_CFLB_b)])

#soros_b
table_soros_b <- table(store_coefficients_soros_b)
locators_soros_b <- as.numeric(rownames(table_soros_b[which(table_soros_b>=threshold)]))
colnames(x_predictors_train_all_predict.opinion[,c(locators_soros_b)])

#iraq_jew_b
table_iraq_jew_b <- table(store_coefficients_iraq_jew_b)
locators_iraq_jew_b <- as.numeric(rownames(table_iraq_jew_b[which(table_iraq_jew_b>=threshold)]))
colnames(x_predictors_train_all_predict.opinion[,c(locators_iraq_jew_b)])

#chemtrail_b
table_chemtrail_b <- table(store_coefficients_chemtrail_b)
locators_chemtrail_b <- as.numeric(rownames(table_chemtrail_b[which(table_chemtrail_b>=threshold)]))
colnames(x_predictors_train_all_predict.opinion[,c(locators_chemtrail_b)])

my_list <- mget(ls())
save(my_list, file="all_opinion_lasso.RData")

my_list <- mget(ls())
save(my_list, file="all_familarities_lasso.RData")

#load("all_familarities_lasso.RData")
table_chemtrail <- my_list$table_chemtrail
table_CFLB <- my_list$table_CFLB
table_fin.crisis <- my_list$table_fin.crisis
table_soros <- my_list$table_soros
table_iraq_jew <- my_list$table_iraq_jew
table_truther <- my_list$table_iraq_jew
table_birther <- my_list$table_birther
#END standard Lasso on full data 

#create correlation tables (using listwise deletion)
library(stargazer)
full_data_numeric <- read.csv("chicago_module_reduced.csv")
index_conspiracy_familiarity <- which(colnames(full_data_numeric) %in% c("truther", "birther", "fin.crisis", "CFLB", 
                                                                         "soros", "iraq_jew", "chemtrail"))
index_conspiracy_opinions <- which(colnames(full_data_numeric) %in% c("truther_b", "birther_b", "fin.crisis_b", "CFLB_b", 
                                                                      "soros_b", "iraq_jew_b", "chemtrail_b"))
index_other_endogeneous_variables <- which(colnames(full_data_numeric) %in% c())
y_conspiracy_familiarity_FULL <- data.matrix(full_data_numeric[,index_conspiracy_familiarity])
y_conspiracy_opinions_FULL <- data.matrix(full_data_numeric[,index_conspiracy_opinions])

y_conspiracy_opinions_FULL[y_conspiracy_opinions_FULL==5] <- 1
y_conspiracy_opinions_FULL[y_conspiracy_opinions_FULL==4] <- 2
y_conspiracy_opinions_FULL[y_conspiracy_opinions_FULL==3] <- 3
y_conspiracy_opinions_FULL[y_conspiracy_opinions_FULL==2] <- 4
y_conspiracy_opinions_FULL[y_conspiracy_opinions_FULL==1] <- 5

correlation_table_opinions <- cor(na.omit(y_conspiracy_opinions_FULL))
correlation_table_familarity <- cor(na.omit(y_conspiracy_familiarity_FULL))
correlation_table_both <- cor(na.omit(cbind(y_conspiracy_familiarity_FULL, y_conspiracy_opinions_FULL)))
stargazer(correlation_table_opinions)
stargazer(correlation_table_familarity)
stargazer(correlation_table_both)
#END create correlation table 


###BEGIN PREDICTING OPINIONS on matched dataset with Lasso 
full_data_numeric <- read.csv("chicago_module_reduced.csv")
full_data_numeric_pre_imputation <- full_data_numeric
set.seed(01244); SCRAMBLE_INDICES <- sample(nrow(full_data_numeric))

#creating the indices 
index_conspiracy_familiarity <- which(colnames(full_data_numeric) %in% c("truther", "birther", "fin.crisis", "CFLB", 
                                                                         "soros", "iraq_jew", "chemtrail"))
index_conspiracy_opinions <- which(colnames(full_data_numeric) %in% c("truther_b", "birther_b", "fin.crisis_b", "CFLB_b", 
                                                                      "soros_b", "iraq_jew_b", "chemtrail_b"))
index_other_endogeneous_variables <- which(colnames(full_data_numeric) %in% c())

store_coefficients_truther_b <- c()
store_coefficients_birther_b <- c()
store_coefficients_fin.crisis_b <- c()
store_coefficients_CFLB_b <- c()
store_coefficients_soros_b <- c()
store_coefficients_iraq_jew_b <- c()
store_coefficients_chemtrail_b <- c()

n_iterations <- 100 
for(i in 1:n_iterations)
{ 
  print(i)
  full_data_numeric <- read.csv("chicago_module_reduced.csv")
  
  #imputation at each iteration 
  SCRAMBLE_INDICES <- sample(nrow(full_data_numeric))
  imputed_dataseta <- data.frame(amelia(full_data_numeric[,1:100], m=1)$imputation) #incheck=FALSE)
  imputed_datasetb <- data.frame(amelia(full_data_numeric[,101:150], m=1)$imputation)
  imputed_dataset_full <- cbind(imputed_dataseta, imputed_datasetb, full_data_numeric[,151:ncol(full_data_numeric)])
  full_data_numeric <- imputed_dataset_full 
  colnames(full_data_numeric) <- colnames(full_data_numeric_pre_imputation)
  
  
  #create the matched datasets 
  match_birther <-  matchit(birther~female+black+liberal+endtimes_followup+attitude2_good_v_evil+consp1_more_informed, 
                            data=full_data_numeric, 
                            method="nearest", 
                            distance="mahalanobis", 
                            discard="treat")
  match_birther_dataset <- match.data(match_birther)
  match_birther_dataset <- match_birther_dataset[,-c(ncol(match_birther_dataset)-1, ncol(match_birther_dataset))]
  
  match_truther <-  matchit(truther~female+black+liberal+endtimes_followup+attitude2_good_v_evil+consp1_more_informed, 
                            data=full_data_numeric, 
                            method="nearest", 
                            distance="mahalanobis", 
                            discard="treat")
  match_truther_dataset <- match.data(match_truther)
  match_truther_dataset <- match_truther_dataset[,-c(ncol(match_truther_dataset)-1, ncol(match_truther_dataset))]
  
  match_fin.crisis <-  matchit(fin.crisis~female+black+liberal+endtimes_followup+attitude2_good_v_evil+consp1_more_informed, 
                               data=full_data_numeric, 
                               method="nearest", 
                               distance="mahalanobis", 
                               discard="treat")
  match_fin.crisis_dataset <- match.data(match_fin.crisis)
  match_fin.crisis_dataset <- match_fin.crisis_dataset[,-c(ncol(match_fin.crisis_dataset)-1, ncol(match_fin.crisis_dataset))]
  
  match_soros <-  matchit(soros~female+black+liberal+endtimes_followup+attitude2_good_v_evil+consp1_more_informed, 
                          data=full_data_numeric, 
                          method="nearest", 
                          distance="mahalanobis")
  match_soros_dataset <- match.data(match_soros)
  match_soros_dataset <- match_soros_dataset[,-c(ncol(match_soros_dataset)-1, ncol(match_soros_dataset))]
  
  
  match_iraq_jew <-  matchit(iraq_jew~female+black+liberal+endtimes_followup+attitude2_good_v_evil+consp1_more_informed, 
                             data=full_data_numeric, 
                             method="nearest", 
                             distance="mahalanobis")
  match_iraq_jew_dataset <- match.data(match_iraq_jew)
  match_iraq_jew_dataset <- match_iraq_jew_dataset[,-c(ncol(match_iraq_jew_dataset)-1, ncol(match_iraq_jew_dataset))]
  
  
  match_chemtrail <-  matchit(chemtrail~female+black+liberal+endtimes_followup+attitude2_good_v_evil+consp1_more_informed, 
                              data=full_data_numeric, 
                              method="nearest", 
                              distance="mahalanobis")
  match_chemtrail_dataset <- match.data(match_chemtrail)
  match_chemtrail_dataset <- match_chemtrail_dataset[,-c(ncol(match_chemtrail_dataset)-1, ncol(match_chemtrail_dataset))]
  
  
  match_CFLB <-  matchit(CFLB~female+black+liberal+endtimes_followup+attitude2_good_v_evil+consp1_more_informed, 
                         data=full_data_numeric, 
                         method="nearest", 
                         distance="mahalanobis")
  match_CFLB_dataset <- match.data(match_CFLB)
  match_CFLB_dataset <- match_CFLB_dataset[,-c(ncol(match_CFLB_dataset)-1, ncol(match_CFLB_dataset))]
  
  ##truther_b
  truther_b_MATCHED <- data.matrix(match_truther_dataset[,index_conspiracy_opinions])[,1]
  x_predictors_train_all_predict.opinion_MATCHED_on_truther <- data.matrix(match_truther_dataset[,-c(index_conspiracy_opinions)])[,-1]
  
  lasso_truther_b <- cv.glmnet(x=x_predictors_train_all_predict.opinion_MATCHED_on_truther, y=truther_b_MATCHED,  
                               nlambda=100,
                               nfolds=10, 
                               alpha=1, 
                               standardize=TRUE, 
                               family="gaussian")
  ceof_truther_b <- coef(lasso_truther_b,s=lasso_truther_b$lambda.min)#use this, since everything isn't set to zero. 
  ceof_truther_b2 <- coef(lasso_truther_b,s=lasso_truther_b$lambda.1se)
  coeff_truther_b_locator <- which(abs(ceof_truther_b2)>0)
  store_coefficients_truther_b <- append(store_coefficients_truther_b, (coeff_truther_b_locator - 1)[-1])
  
  ##birther_b
  birther_b_MATCHED <- data.matrix(match_birther_dataset[,index_conspiracy_opinions])[,2]
  x_predictors_train_all_predict.opinion_MATCHED_on_birther <- data.matrix(match_birther_dataset[,-c(index_conspiracy_opinions)])[,-1]
  
  lasso_birther_b <- cv.glmnet(x=x_predictors_train_all_predict.opinion_MATCHED_on_birther, y=as.matrix(birther_b_MATCHED), 
                               nlambda=100,
                               nfolds=10, 
                               alpha=1, 
                               standardize=TRUE, 
                               family="gaussian")
  ceof_birther_b <- coef(lasso_birther_b,s=lasso_birther_b$lambda.min)#use this, since everything isn't set to zero. 
  ceof_birther_b2 <- coef(lasso_birther_b,s=lasso_birther_b$lambda.1se)
  coeff_birther_b_locator <- which(abs(ceof_birther_b2)>0)
  store_coefficients_birther_b <- append(store_coefficients_birther_b, (coeff_birther_b_locator - 1)[-1])
  
  ##fin.crisis_b
  fin.crisis_b_MATCHED <- data.matrix(match_fin.crisis_dataset[,index_conspiracy_opinions])[,3]
  x_predictors_train_all_predict.opinion_MATCHED_on_fin.crisis <- data.matrix(match_fin.crisis_dataset[,-c(index_conspiracy_opinions)])[,-1]
  
  lasso_fin.crisis_b <- cv.glmnet(x=x_predictors_train_all_predict.opinion_MATCHED_on_fin.crisis, y=fin.crisis_b_MATCHED, 
                                  nlambda=100,
                                  nfolds=10, 
                                  alpha=1, 
                                  standardize=TRUE, 
                                  family="gaussian")
  ceof_fin.crisis_b <- coef(lasso_fin.crisis_b,s=lasso_fin.crisis_b$lambda.min)#use this, since everything isn't set to zero. 
  ceof_fin.crisis_b2 <- coef(lasso_fin.crisis_b,s=lasso_fin.crisis_b$lambda.1se)
  coeff_fin.crisis_b_locator <- which(abs(ceof_fin.crisis_b)>0)
  store_coefficients_fin.crisis_b <- append(store_coefficients_fin.crisis_b, (coeff_fin.crisis_b_locator - 1)[-1])
  
  ##CFLB_b
  CFLB_b_MATCHED <- data.matrix(match_CFLB_dataset[,index_conspiracy_opinions])[,4]
  x_predictors_train_all_predict.opinion_MATCHED_on_CFLB <- data.matrix(match_CFLB_dataset[,-c(index_conspiracy_opinions)])[,-1]
  
  lasso_CFLB_b <- cv.glmnet(x=x_predictors_train_all_predict.opinion_MATCHED_on_CFLB, y=CFLB_b_MATCHED, 
                            nlambda=100,
                            nfolds=10, 
                            alpha=1, 
                            standardize=TRUE, 
                            family="gaussian")
  ceof_CFLB_b <- coef(lasso_CFLB_b,s=lasso_CFLB_b$lambda.min)#use this, since everything isn't set to zero. 
  ceof_CFLB_b2 <- coef(lasso_CFLB_b,s=lasso_CFLB_b$lambda.1se)
  coeff_CFLB_b_locator <- which(abs(ceof_CFLB_b2)>0)
  store_coefficients_CFLB_b <- append(store_coefficients_CFLB_b, (coeff_CFLB_b_locator - 1)[-1])
  
  ##soros_b
  soros_b_MATCHED <- data.matrix(match_soros_dataset[,index_conspiracy_opinions])[,5]
  x_predictors_train_all_predict.opinion_MATCHED_on_soros <- data.matrix(match_soros_dataset[,-c(index_conspiracy_opinions)])[,-1]
  
  lasso_soros_b <- cv.glmnet(x=x_predictors_train_all_predict.opinion_MATCHED_on_soros, y=soros_b_MATCHED, 
                             nlambda=100,
                             nfolds=10, 
                             alpha=1, 
                             standardize=TRUE, 
                             family="gaussian")
  ceof_soros_b <- coef(lasso_soros_b,s=lasso_soros_b$lambda.min)#use this, since everything isn't set to zero. 
  ceof_soros_b2 <- coef(lasso_soros_b,s=lasso_soros_b$lambda.1se)
  coeff_soros_b_locator <- which(abs(ceof_soros_b2)>0)
  store_coefficients_soros_b <- append(store_coefficients_soros_b, (coeff_soros_b_locator - 1)[-1])
  
  ##iraq_jew_b
  iraq_jew_b_MATCHED <- data.matrix(match_iraq_jew_dataset[,index_conspiracy_opinions])[,6]
  x_predictors_train_all_predict.opinion_MATCHED_on_iraq_jew <- data.matrix(match_iraq_jew_dataset[,-c(index_conspiracy_opinions)])[,-1]
  
  lasso_iraq_jew_b <- cv.glmnet(x=x_predictors_train_all_predict.opinion_MATCHED_on_iraq_jew, y=iraq_jew_b_MATCHED, 
                                nlambda=100,
                                nfolds=10, 
                                alpha=1, 
                                standardize=TRUE, 
                                family="gaussian")
  ceof_iraq_jew_b <- coef(lasso_iraq_jew_b,s=lasso_iraq_jew_b$lambda.min)#use this, since everything isn't set to zero. 
  ceof_iraq_jew_b2 <- coef(lasso_iraq_jew_b,s=lasso_iraq_jew_b$lambda.1se)
  coeff_iraq_jew_b_locator <- which(abs(ceof_iraq_jew_b2)>0)
  store_coefficients_iraq_jew_b <- append(store_coefficients_iraq_jew_b, (coeff_iraq_jew_b_locator - 1)[-1])
  
  ##chemtrail_b
  chemtrail_b_MATCHED <- data.matrix(match_chemtrail_dataset[,index_conspiracy_opinions])[,7]
  x_predictors_train_all_predict.opinion_MATCHED_on_chemtrail <- data.matrix(match_chemtrail_dataset[,-c(index_conspiracy_opinions)])[,-1]
  
  lasso_chemtrail_b <- cv.glmnet(x=x_predictors_train_all_predict.opinion_MATCHED_on_chemtrail, y=chemtrail_b_MATCHED,  
                                 nlambda=100,
                                 nfolds=10, 
                                 alpha=1, 
                                 standardize=TRUE, 
                                 family="gaussian")
  ceof_chemtrail_b <- coef(lasso_chemtrail_b,s=lasso_chemtrail_b$lambda.min)#use this, since everything isn't set to zero. 
  ceof_chemtrail_b2 <- coef(lasso_chemtrail_b,s=lasso_chemtrail_b$lambda.1se)
  coeff_chemtrail_b_locator <- which(abs(ceof_chemtrail_b2)>0)
  store_coefficients_chemtrail_b <- append(store_coefficients_chemtrail_b, (coeff_chemtrail_b_locator - 1)[-1])
}

threshold <- n_iterations

#load in prior data 
#load("all_opinion_lasso.RData")
table_chemtrail_b <- my_list$table_chemtrail_b
table_CFLB_b <- my_list$table_CFLB_b
table_fin.crisis_b <- my_list$table_fin.crisis_b
table_soros_b <- my_list$table_soros_b
table_iraq_jew_b <- my_list$table_iraq_jew_b
table_truther_b <- my_list$table_iraq_jew_b
table_birther_b <- my_list$table_birther_b

#truther_b
table_truther_b <- table(store_coefficients_truther_b)
locators_truther_b <- as.numeric(rownames(table_truther_b[which(table_truther_b>=threshold)]))
colnames(x_predictors_train_all_predict.opinion_MATCHED_on_truther[,c(locators_truther_b)])

#birther_b
table_birther_b <- table(store_coefficients_birther_b)
locators_birther_b <- as.numeric(rownames(table_birther_b[which(table_birther_b>=threshold)]))
colnames(x_predictors_train_all_predict.opinion_MATCHED_on_birther[,c(locators_birther_b)])

#fin.crisis_b
table_fin.crisis_b <- table(store_coefficients_fin.crisis_b)
locators_fin.crisis_b <- as.numeric(rownames(table_fin.crisis_b[which(table_fin.crisis_b>=threshold)]))
colnames(x_predictors_train_all_predict.opinion_MATCHED_on_fin.crisis[,c(locators_fin.crisis_b)])

#CFLB_b
table_CFLB_b <- table(store_coefficients_CFLB_b)
locators_CFLB_b <- as.numeric(rownames(table_CFLB_b[which(table_CFLB_b>=threshold)]))
colnames(x_predictors_train_all_predict.opinion_MATCHED_on_CFLB[,c(locators_CFLB_b)])

#soros_b
table_soros_b <- table(store_coefficients_soros_b)
locators_soros_b <- as.numeric(rownames(table_soros_b[which(table_soros_b>=threshold)]))
colnames(x_predictors_train_all_predict.opinion_MATCHED_on_soros[,c(locators_soros_b)])

#iraq_jew_b
table_iraq_jew_b <- table(store_coefficients_iraq_jew_b)
locators_iraq_jew_b <- as.numeric(rownames(table_iraq_jew_b[which(table_iraq_jew_b>=threshold)]))
colnames(x_predictors_train_all_predict.opinion_MATCHED_on_iraq_jew[,c(locators_iraq_jew_b)])

#chemtrail_b
table_chemtrail_b <- table(store_coefficients_chemtrail_b)
locators_chemtrail_b <- as.numeric(rownames(table_chemtrail_b[which(table_chemtrail_b>=threshold?)]))
colnames(x_predictors_train_all_predict.opinion_MATCHED_on_chemtrail[,c(locators_chemtrail_b)])

table_chemtrail <- my_list$table_chemtrail
table_CFLB <- my_list$table_CFLB
table_fin.crisis <- my_list$table_fin.crisis
table_soros <- my_list$table_soros
table_iraq_jew <- my_list$table_iraq_jew
table_truther <- my_list$table_iraq_jew
table_birther <- my_list$table_birther
#END Lasso on Matched dataset 
